
clear all
set mem 5000m
set more off
set seed 12345

*global path = "C:/Users/Kevin Shih/Dropbox/Work/Research Projects/Active/chinese_students_2017/"
global path = "C:/Users/KShih/Dropbox/Work/Research Projects/Active/chinese_students_2017/"





***THIS FILE PRODUCES INFERENCE CHECKS
*1. Main results using BHJ aggregation -- show it is same coeff
*2. BHJ Exposure Robust SEs -- i.e. BHJ main result, with industry level controls included on top of everything >
*2. Adao et al SEs >
*3 BHJ Mutual correlation of shifters, trying different industry clustering >
*4 Province cluster? DONE
*5 Conley clusters? DONE


global controls "chinatariff00_expw97 share_revenue_exw97 inputtariff02_expw97 contract_cons_exw97"











	
	cd "$path/Data/6.AnalysisData/bhj_ss_analysis_July2021"

	**load data and create 0213 changes in students
	use "$path/Submissions/ReStat/Replication Files 2022/kswxy_data", clear

	drop if citycode==.
	sort citycode year

	keep if year==2013
	keep citycode shc_stud_tot_0213 shc_stud_tot_0413 iv_ntr contract_cons_exw97 chinatariff00_expw97 inputtariff02_expw97 share_revenue_exw97




	**SSAGGREGATE TO SHOCK LEVEL
	gen constant =1
	local controls1 "constant"
	local controls2 "chinatariff00_expw97"
	local controls3 "chinatariff00_expw97 share_revenue_exw97"
	local controls4 "chinatariff00_expw97 share_revenue_exw97 inputtariff02_expw97"
	local controls5 "chinatariff00_expw97 share_revenue_exw97 inputtariff02_expw97 contract_cons_exw97"

	ssaggregate shc_stud_tot_0213 iv_ntr, n(isic) s(beta_pi_hasshock) l(citycode) controls("`controls1'" "`controls2'" "`controls3'" "`controls4'" "`controls5'") sfilename(exp97shares_long) addmissing 


	//merge ntrgaps
	*merge in NTR gaps
	gen isic_str=string(isic)
	merge m:1 isic_str using "$path/Data/PSdata/isic_gap_manuf.dta"
	keep if _merge==3
	drop _merge
	rename s1999 ntrgap


	unique isic // should be 119


	
	//prepare industry controls to merge in
		preserve	
			//Contract intensity, 1997
			use "$path/Data/Nunn/contract_intensity.dta", clear
			tostring isic, gen(isic_str)
			keep isic_str contract_cons contract_liberal
			rename contract_cons contract_cons1997 
			rename contract_liberal contract_liberal1997
			tempfile contract_intensity
			save "`contract_intensity'"
			
			//Export Requirements, 2000
			use "$path/Data/Export Licenses/export_req.dta", clear
			keep if year==2000
			tostring isic, gen(isic_str)
			keep isic_str share_revenue fraction_nfirm
			rename share_revenue share_revenue2000
			rename fraction_nfirm fraction_nfirm2000
			tempfile export_req
			save "`export_req'"
			
			//Import tariffs, 2000
			use "$path/Data/Tariffs/chinatariffs.dta", clear
			keep if year==2000
			tostring isic, gen(isic_str)
			keep isic_str chinatariff_isic
			rename chinatariff_isic importtariff2000
			replace importtariff2000 = log(1+importtariff2000/100)
			tempfile import_tariff
			save "`import_tariff'"
			
			//Input tariffs, 2002
			use "$path/Data/Tariffs/inputtariffs.dta", clear
			keep isic_str inputtariff_isic
			rename inputtariff_isic inputtariff2002
			compress
			tempfile input_tariff
			save "`input_tariff'"
			
			//Other firm vars from ASIP, 2000
			use ASIP2000, clear
			
			gen labor_va_2000 = laborpayment / va
			gen cap_va_2000 = fixedasset/va 
			gen roa_2000 = profit/asset
			gen roe_2000 = profit/equity_tot
				
			collapse (mean) labor_va cap_va roa roe, by(isic)
			
			rename isic isic_str
			
			tempfile indvars00
			save "`indvars00'"
		restore	
			
	
	//merge in industry controls	
		merge m:1 isic_str using "`contract_intensity'"
				**impute missing export_req with isic3 or isic2 medians
					gen isic3 = substr(isic_str,1,3)
					gen isic2 = substr(isic_str,1,2)
					foreach var in contract_liberal1997 contract_cons1997 {
					egen misic3_`var' = median(`var'), by(isic3)
					egen misic2_`var' = median(`var'), by(isic2)
					replace `var' = misic3_`var' if `var'==.
					replace `var' = misic2_`var' if `var'==.
					}
					drop isic3 isic2 misic*
		drop if _merge==2
		drop _merge
		
		merge m:1 isic_str using "`export_req'"
				**impute missing export_req with isic3 or isic2 medians
					gen isic3 = substr(isic_str,1,3)
					gen isic2 = substr(isic_str,1,2)
					foreach var in share_revenue2000 fraction_nfirm2000 {
					egen misic3_`var' = median(`var'), by(isic3)
					egen misic2_`var' = median(`var'), by(isic2)
					replace `var' = misic3_`var' if `var'==.
					replace `var' = misic2_`var' if `var'==.
					}
					drop isic3 isic2 misic*
		drop if _merge==2
		drop _merge
		
		merge m:1 isic_str using "`import_tariff'"
		replace importtariff2000 = ln(1+(importtariff2000 /100))
		drop if _merge==2
		drop _merge
		
		merge m:1 isic_str using "`input_tariff'"
			**impute missing input_tariff with isic3 or isic2 medians
					gen isic3 = substr(isic_str,1,3)
					gen isic2 = substr(isic_str,1,2)
					gen isic1 = substr(isic_str,1,1)
					foreach var in inputtariff2002 {
					egen misic3_`var' = median(`var'), by(isic3)
					egen misic2_`var' = median(`var'), by(isic2)
					egen misic1_`var' = median(`var'), by(isic1)
					replace `var' = misic3_`var' if `var'==.
					replace `var' = misic2_`var' if `var'==.
					replace `var' = misic1_`var' if `var'==.
					}
		drop isic3 isic2 isic1 misic*
		drop if _merge==2
		drop _merge
		
		
		merge m:1 isic_str using "`indvars00'"
			**impute missing input_tariff with isic3 or isic2 medians
					gen isic3 = substr(isic_str,1,3)
					gen isic2 = substr(isic_str,1,2)
					gen isic1 = substr(isic_str,1,1)
					foreach var in labor_va cap_va roa roe {
					egen misic3_`var' = median(`var'), by(isic3)
					egen misic2_`var' = median(`var'), by(isic2)
					egen misic1_`var' = median(`var'), by(isic1)
					replace `var' = misic3_`var' if `var'==.
					replace `var' = misic2_`var' if `var'==.
					replace `var' = misic1_`var' if `var'==.
					}
		drop isic3 isic2 isic1 misic*
		drop if _merge==2
		
		drop if isic==.
		
		
		**BHJ Aggregated Industry level regression
		rename iv_ntr5 iv_ntr
		eststo bhj: ivreg shc_stud_tot_02135 (iv_ntr=ntrgap) [aw=s_n], rob

		
		**BHJ Exposure Robust, USE iv_ntr5 which partials out all city level controls
		eststo exp_rob: ivreg shc_stud_tot_02135 (iv_ntr=ntrgap) share_revenue2000 importtariff2000  [aw=s_n], rob

		**BHJ ICC checks
		gen isic_2=int(isic/100)
		gen isic_3=int(isic/10)
		gen isic_4=isic

			
		*Intra cluster correlation checks
		eststo icc_3: ivreg2 shc_stud_tot_02135 (iv_ntr=ntrgap) [aw=s_n], cluster(isic_3)
		
		
		eststo icc_2: ivreg2 shc_stud_tot_02135 (iv_ntr=ntrgap) [aw=s_n] , cluster(isic_2) 
		boottest iv_ntr, cluster(isic_2) bootcluster(isic_2)  boottype(wild) 
		
		matrix CI=r(CI)
		local c_low: display %5.3f round(CI[1,1], 0.001)
		local c_up: display %5.3f round(CI[1,2], 0.001)
		estadd local CI="[`c_low',`c_up']": icc_2
		estadd local pv=round(r(p),0.001): icc_2
		di "`CI'"

		
		esttab bhj exp_rob icc_3 icc_2, b(3) se(3) keep(iv_ntr) star(* 0.10 ** 0.05 *** 0.01) stats(CI pv)

		rename iv_ntr iv_ntr5 
		
		
		
		
		**SAVE THIS TEMPFILE OF ISIC CODES FOR ADAO
		keep isic_3 isic_4
		tempfile isic_3
		save `isic_3'
		
		
		
		
		
		
		
		

		**ADAO ET AL STANDARD ERRORS
		
		**load data and create 0213 changes in students
		use "$path/Submissions/ReStat/Replication Files 2022/kswxy_data", clear

		drop if citycode==.
		sort citycode year

		**merge 1997 export shares wide form
		merge m:1 chinacity using "$path\Data\6.AnalysisData\wide\expshare97_NTR_wide_pref_rev.dta"
		*merge m:1 citycode using "$path\Data\6.AnalysisData\bhj_ss_analysis_July2021\exp97shares_wide.dta"
		drop if _merge==2
		drop _merge
		
		/*
			//must replace missing with 0s
			foreach var of varlist beta_pi1511-beta_pi3699 {
					replace `var' = 0 if `var'==.
			}
		*/

		
		*gen AKM_1=iv_ntr
		*gen AKM_0 =iv_ntr

		sum iv_ntr if year==2013 & balanced==1 & shc_stud_tot_0213!=., de
		local iqr_ivntr = r(p75) - r(p25)
				
		

		keep if year==2013 & balanced==1 & shc_stud_tot_0213!=.
		
		eststo adao: reg_ss shc_stud_tot_0213, shiftshare_var(iv_ntr) share_varlist(beta_pi1511-beta_pi3699) akmtype(1) control_varlist($controls) alpha(0.10)
		estadd local iqreffect = round(_b[iv_ntr]*`iqr_ivntr'*1000,1)
				local AKM_se1=e(se)
				local round_se=round(`AKM_se1',0.001)
				local AKM_1se="(0"+"`round_se'"+")"
				matrix CI_low= e(CI_low)
				*local CI_low=round(CI_low[1,1],0.0001)
				local CI_low: display %5.3f round(CI_low[1,1],0.001)
				matrix CI_upp= e(CI_upp)
				*local CI_upp=round(CI_upp[1,1],0.0001)
				local CI_upp: display %5.3f round(CI_upp[1,1],0.001)
				
				*local CI_AKM0="[0"+"`CI_low'"+","+"0"+"`CI_upp']"
				*di "`CI_AKM0'"
				*estadd local CI_AKM0_s = "`CI_AKM0'": H2
				estadd local CI_AKM_s="[`CI_low',`CI_upp']": adao
				
				
				
		**to get CI
			reg_ss shc_stud_tot_0213, shiftshare_var(iv_ntr) share_varlist(beta_pi1512-beta_pi3699) akmtype(0) control_varlist($controls) alpha(0.10)
				ereturn list
				estadd local iqreffect = round(_b[iv_ntr]*`iqr_ivntr'*1000,1)
				local AKM_se0=e(se)
				local round_se=round(`AKM_se0',0.001)
				local AKM_0se="(0"+"`round_se'"+")"
				di "`AKM_1se'" 
				di "`AKM_0se'"
				matrix CI_low= e(CI_low)
				*local CI_low=round(CI_low[1,1],0.0001)
				local CI_low: display %5.3f round(CI_low[1,1],0.001)
				matrix CI_upp= e(CI_upp)
				*local CI_upp=round(CI_upp[1,1],0.0001)
				local CI_upp: display %5.3f round(CI_upp[1,1],0.001)
				
				*local CI_AKM0="[0"+"`CI_low'"+","+"0"+"`CI_upp']"
				*di "`CI_AKM0'"
				*estadd local CI_AKM0_s = "`CI_AKM0'": H2
				estadd local CI_AKM0_s="[`CI_low',`CI_upp']": adao
				
				
				
				*label var AKM_1 "AKM Shift-share Method"
				esttab adao, keep(iv_ntr) b(3) se(3) star(* 0.10 ** 0.05 ** 0.01) stats(CI_AKM_s CI_AKM0_s) 
		***		
		***		
				
		


		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		**CONLEY PROVINCE CLUSTER
		use "$path/Submissions/ReStat/Replication Files 2022/kswxy_data", clear

		drop if citycode==.
		sort citycode year

		
		merge n:1 citycode using "$path/Data/4.MapData/Citylatlon.dta"
		drop if _m==2
		drop _m
		
		drop if _lat==.
		drop if _lon==.
		eststo clust_con50: acreg shc_stud_tot_0213 iv_ntr $controls if year==2013 & balanced==1 & _lat!=. & _lon!=., latitude(_lat) longitude(_lon) dist(50) spatial bartlett
		eststo clust_con100: acreg shc_stud_tot_0213 iv_ntr $controls if year==2013 & balanced==1 , latitude(_lat) longitude(_lon) dist(100) spatial bartlett
		eststo clust_con200: acreg shc_stud_tot_0213 iv_ntr $controls if year==2013 & balanced==1 , latitude(_lat) longitude(_lon) dist(200) spatial bartlett
		
		gen province_cluster = int(citycode/100) //need to create this as province vars have a few missing
		eststo clust_prov: reg shc_stud_tot_0213 iv_ntr $controls if year==2013 & balanced==1 , vce(cluster province_cluster)

		
		esttab clust_con50 clust_con100 clust_con200 clust_prov, b(3) se(3) star(* 0.10 ** 0.05 *** 0.01) keep(iv_ntr)
		
		
		
		
		
		
		
		
	**MAKE TABLE
	**ADD NUMBER OF CLUSTERS!!
	estadd local clusters = 57: icc_3
	estadd local clusters = 22: icc_2
	estadd local clusters = 30: clust_prov
	
	/*
	*with adao
	esttab bhj icc_3 icc_2 adao clust_con50 clust_con100 clust_con200 clust_prov using "$path/Submissions/ReStat/Replication Files 2022/table_inference.tex", replace ///
			booktabs b(3) se(3) star(* 0.10 ** 0.05 *** 0.01) keep(iv_ntr) coeflabel(iv_ntr "PNTR Exposure") ///
			mtitles("\shortstack{BHJ Exposure-\\Robust SEs}" "\shortstack{BHJ Exposure-\\Robust SEs\\Cluster on\\3-digit ISIC}" "\shortstack{BHJ Exposure-\\Robust SEs\\Cluster on\\2-digit ISIC}" "AKM SEs" ///
			"\shortstack{Conley Spatial SEs\\(50 KM Distance)}" "\shortstack{Conley Spatial SEs\\(100KM Distance)}" "\shortstack{Conley Spatial SEs\\(200KM Distance)}" "\shortstack{Cluster on\\Province}") ///
			stats(CI pv CI_AKM_s CI_AKM0_s, label("Wild Bootstrap CI" "Wild Bootstrap p-value" "AKM CI" "AKM0 CI")) nonotes
	*/
	
	*without adao			
	esttab bhj exp_rob icc_3 icc_2 clust_con50 clust_con100 clust_con200 clust_prov using "$path/Submissions/ReStat/Replication Files 2022/table_inference.tex", replace ///
			booktabs b(3) se(3) star(* 0.10 ** 0.05 *** 0.01) keep(iv_ntr) coeflabel(iv_ntr "PNTR Exposure") ///
			mtitles("\shortstack{BHJ\\Shock-Level\\Regression}" "\shortstack{BHJ Exposure-\\Robust SEs}" "\shortstack{BHJ\\Cluster on\\3-digit ISIC}" "\shortstack{BHJ\\Cluster on\\2-digit ISIC}" ///
			"\shortstack{Conley Spatial SEs\\(50 KM Distance)}" "\shortstack{Conley Spatial SEs\\(100KM Distance)}" "\shortstack{Conley Spatial SEs\\(200KM Distance)}" "\shortstack{Cluster on\\Province}") ///
			stats(clusters, label("Number of Clusters")) nonotes
